pacman::p_load(tidyverse, data.table, broom,lfe,modelsummary)
options("modelsummary_format_numeric_latex" = "plain")
rm(list=ls())
################################################################################

#Price gap data
destination_unit = 'destination_bloc' 
df = fread(paste0('inputs/data_tradecosts_',destination_unit,'.csv.gz'))
df[destination_unit %in% c('MIDDLE EAST'),]$destination_unit = 'ASIA'
df_gaps = df

#Distance data
spatial_id='amc'
df = fread(paste0('../../data/shapefiles/clean/distance_',spatial_id,'_country.csv.gz'))
setnames(df, old=c(paste0(spatial_id,'_id'), 'destination_continent'), new=c('spatial_id','destination_unit'))
df$destination_unit = toupper(df$destination_unit)
df_dist = df[, list(spatial_id, destination_unit, distance_km)][, lapply(.SD, mean, na.rm=TRUE), by=list(spatial_id, destination_unit)]

#All data
df_main = merge(df_gaps, df_dist, by=c('spatial_id','destination_unit'))
setnames(df_main, old='distance_km',new='d_ij')


###### Regressions

df = df_main
df$y1 = log(df$v_pc)
df$y2 = log(df$v_ic)
df$x = log(df$d_ij)
df$spatial_fe = df$spatial_id

df_e = unique(df[,c('spatial_id','commodity','product_type')])
df_e$ick = paste0(df_e$spatial_id,'_',df_e$commodity,'_',df_e$product_type)
df_e1 = df_e

df_e = unique(df[,c('spatial_id','commodity','product_type','year')])
df_e$ickt = paste0(df_e$spatial_id,'_',df_e$commodity,'_',df_e$product_type,'_',df_e$year)
df_e2 = df_e

df = merge(df, df_e1, by=c('spatial_id','commodity','product_type'))
df = merge(df, df_e2, by=c('spatial_id','commodity','product_type','year'))
df_sample = df[is.finite(x) & is.finite(y1) & is.finite(y2),]

pc_1 = felm(y1 ~ x | commodity | 0 | spatial_id, data=df_sample)
pc_2 = felm(y1 ~ x | commodity + product_type | 0 | spatial_id, data=df_sample)
pc_3 = felm(y1 ~ x | commodity + product_type + spatial_fe | 0 | spatial_id, data=df_sample)
pc_4 = felm(y1 ~ x | commodity + product_type + spatial_fe + year | 0 | spatial_id, data=df_sample)
pc_5 = felm(y1 ~ x | ick | 0 | spatial_id, data=df_sample)
pc_6 = felm(y1 ~ x | ickt | 0 | spatial_id, data=df_sample)

icu_1 = felm(y2 ~ x | commodity | 0 | spatial_id, data=df_sample)
icu_2 = felm(y2 ~ x | commodity + product_type | 0 | spatial_id,  data=df_sample)
icu_3 = felm(y2 ~ x | commodity + product_type + spatial_fe | 0 | spatial_id, data=df_sample)
icu_4 = felm(y2 ~ x | commodity + product_type + spatial_fe + year | 0 | spatial_id, data=df_sample)
icu_5 = felm(y2 ~ x | ick | 0 | spatial_id, data=df_sample)
icu_6 = felm(y2 ~ x | ickt | 0 | spatial_id, data=df_sample)


###### Matrix

coef_distance_pc = -summary(pc_4)$coefficients[1]
coef_distance_ic = -summary(icu_4)$coefficients[1]

df_dist[, c('tc_pc','tc_ic') := list( exp(coef_distance_pc*log(distance_km)), exp(coef_distance_ic*log(distance_km)) ) ]
write.csv(df_dist[, list(spatial_id,destination_unit,tc_pc,tc_ic)], gzfile(paste0("inputs/parameters_tradecosts_",destination_unit,".csv.gz")), row.names = FALSE)


###### MP

df = df_main
df$mp_ic = exp( log(df$v_ic) + coef_distance_ic*log(df$d_ij) )
df$mp_pc = exp( log(df$v_pc) + coef_distance_pc*log(df$d_ij) )
df_mp = df[, list(year, country, state_id, spatial_id, destination_unit,commodity, product_type, mp_pc, mp_ic, price_ij)]
write.csv(df_mp, gzfile(paste0("inputs/parameters_marginalproduct_",destination_unit,".csv.gz")), row.names = FALSE)

